#!/usr/bin/env python
#coding=utf-8
from BeautifulSoup import BeautifulSoup 
import urllib,time,socket,bsddb
socket.setdefaulttimeout(5)

import utils,settings
from ex import NoHistData

csvdir = settings.root + settings.history
u = 'http://cn.finance.yahoo.com/q/hp?s=%s.ss'
codes_no_hist = []

logdir = settings.root + settings.log + '/history.log'
log = open(logdir, 'w')

def get_csvurl(code):
    url = u%code
    soup = utils.get_soup(url)    
    t = unicode('下载到工作簿', 'u8')
    url = soup.find(text= lambda(x): x.find(t)!=-1)
    if not url:
        codes_no_hist.append(code)
        log.write(code)
        log.flush()
        raise NoHistData(code)
     
    url = url.parent['href']
    return url

c = utils.get_scoder()
#c.set_location('600393')
while True:
    try:
        time.sleep(0.5)
        code = unicode(c.next()[0])
    except KeyError:
        break   
    
    try:   
        url2 = get_csvurl(code)
    
    
        fn = '/%s.csv'%code
        fn = csvdir + fn
   
        urllib.urlretrieve(url2, fn)
        
    except NoHistData,e:
        print e
        continue    
    except (socket.timeout,IOError), e:
        
        try:
            c.previous()
        # 如果 code 正好是第一个 :-( ...    
        except bsddb._bsddb.DBNotFoundError:
            c.close()
            c = utils.get_scoder()
        print '\n', code, ':', e, '\n'
        continue
    else:
        print code
        
print 'NO Hist Data Found:',codes_no_hist        
log.close()
c.close()
